import requests
from lxml import etree
import pymysql
import mysql
import logging
from 垃圾堆 import citylink
import time
def sheep():
    time.sleep(1)
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.44'
}

#我是拿出数据库的city的链接
def cityinfo():
    bd = pymysql.connect(host="localhost", user="root", password="zs233", database="py_sql_jianzhimao")
    cursor1 = bd.cursor()
    sql = "SELECT * FROM cityinfo"
    cursor1.execute(sql)
    allcity = cursor1.fetchall()
    # print(allcity)
    bd.close()
    return allcity


# cityinfo()
#我是阉了city链接的前半部分
def area_html():
    global cityhtml
    cityhtmllist = []
    for i in cityinfo():
        cityhtml = i[-1]
        cityhtmllist.append(cityhtml)
    return cityhtmllist

# area()
# " ".join('%s' %a for a in lists)

#西八先是爬取地区的html然后取得每一个小地区的name和herf然后和楼上的西八东西合体
def area_info():
    global reallylink
    for i in area_html():
        reps = requests.get(url=i, headers=headers)
        areaHtml = reps.text
        # print(areaHtml)
        # logging.captureWarnings(True)
        tree = etree.HTML(areaHtml)
        try:
            sheep()
            areaName = tree.xpath('//ul[@class="box"]/li[3]/a/text()')
            areaName.pop(0)#删掉前面的不限
            arealink = tree.xpath('//ul[@class="box"]/li[3]/a/@href')
            # print(type(arealink))
            arealink.pop(0)#删掉前面的不限的/
            # print(arealink)
            # for o in citylink:
            #     print(o)
            reallyarealink = [i+a for a in arealink]#我来组成头部，我来组成尾部

            dict_data = dict(zip(areaName, reallyarealink))#放字典里等一货放mysql

            for name, link in dict_data.items():
                tianjiasuju(name, link)#

        except:#有的西八东西是错的，鱼鱼快动啊，跳过那个
            print("shake it !")
            print("鱼鱼快动啊")



    #错的和字典
        # return reallylink
        # arealink = ['https://guangzhou.jianzhimao.com/'+i for i in arealink]#错的
        # arealink.pop(0)
        # dict_data = dict(zip(areaName, arealink))
        # print(dict_data)
        #
# areainfo()

#我企图用txt
        # areanamelist.append(areaName)
        # areanamelist.append(arealink)
        # str1="".join('%s' %a for a in areanamelist)
        # str2 = "".join(arealink)
        # print(areanamelist)
        # 保存到txt是什么xx
        # with open("E:/PycharmData/1.txt","w")as a:
        #     a.write(str1)
        #     a.close()
        # 保存到txt是什么xx结束su
        # print(str1)
# areainfo()
# print("finish")

# 添加数据库

def tianjiasuju(areaName, reallylink):
    sql = """
    insert into areainfos(areaName,arealink)
    values
    ('%s','%s')
    """ % (areaName, reallylink)
    mysql.gengxinbiao(sql)


if __name__ == '__main__':
    area_info()
    print("finish")
